In [1]:
from IPython.display import Image, display, HTML
In [2]:
Image('../img/title.jpg')
Out[2]:
In [3]:
import sys
sys.path.insert(0, '../scripts/')

import pickle as pkl
import pandas as pd
import numpy as np
import seaborn as sns

from visualizer import scatter_3D, display_samples, get_fulllink
In [4]:
years = np.arange(2008, 2018)

df = pd.concat([pd.read_csv(f'../data/dataframes2/{y}.csv')
                for y in years], axis=0).dropna()
In [5]:
with open("../data/transformed_data/scatter_dict_2008_2017.pkl","rb") as f:
    scatter_dict = pkl.load(f)
    
X = scatter_dict['data']
labels = scatter_dict['labels']
colors = scatter_dict['colors']

with open("../data/transformed_data/cluster_dict.pkl","rb") as f:
    cluster_dict = pkl.load(f)

df['cluster'] = cluster_dict['clusters']
agg_colors = cluster_dict['agg_colors']

scatter_3D(X[:, :3], agg_colors, labels)
In [6]:
themes = ['Labor Related', 'Properties and Taxes', 'Drug Cases', 'Rape',
          'Murder/Kidnapping', 'Certiorari']

n_clusters = 6
palette = sns.color_palette('hls', n_colors=n_clusters).as_hex()

for i, thm in zip(np.arange(n_clusters), themes):
    display(HTML(f'<h3 style="color:{palette[i]};">Cluster {i}: {thm}</h3>'))
    for _, row in df[df.cluster==i].sample(n=3, random_state=42).iterrows():
        display(HTML(f'<a href="{get_fulllink(row.link)}">{row.case_number}</a>'))
#         print(get_fulllink(row.link))
In [7]:
Image('../img/how.jpg')
Out[7]:
In [18]:
HTML('<img src="../img/lawphil.gif">')
# <img src="../img/lawphil.gif">
Out[18]:
In [9]:
Image('../img/process.png')
Out[9]:

Plot PCA features in 3D

In [10]:
scatter_3D(X[:, :3], colors, labels)

Hierarchical Clustering

In [11]:
Image('../img/hierarchical.png', width=900)
Out[11]:

Value: Recommendation System

  • Suggests relevant cases in a fraction of the time and effort spent on manual research
  • Allows users to spend more of their time and effort on other important aspects of practicing law
In [12]:
display(HTML('<h2>Sample Case</h2>'))
Image('../img/sample_case.png', width=900)

Sample Case

Out[12]:

Top 3 Recommendations

In [13]:
Image('../img/top_recommendation1.png', width=900)
Out[13]:
In [14]:
Image('../img/top_recommendation2.png', width=900)
Out[14]:
In [15]:
Image('../img/top_recommendation3.png', width=900)
Out[15]:

Limitations

  • Only used 2008 to 2017 data in building models (we have data since 1901)
  • Only used webpages in .html format (some of the cases are in pdf format)
  • Interpretations are only based on our observations and without any

Recommendations

  • Increase number of data (cases which are not in the website may be used to increase case variations)
  • Consult with domain experts to improve interpretation of clusters or even tweak the models even further
  • Analyze case in terms of "case networks" to determine important cases which are commonly referenced
  • Build prediction models for the outcome of the case based on evidence, judge handling the case, date and other variables
In [ ]: